{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introduction to XGBoost-Spark Cross Validation with GPU\n",
    "\n",
    "The goal of this notebook is to show you how to levarage GPU to accelerate XGBoost spark cross validatoin for hyperparameter tuning. The best model for the given hyperparameters will be returned.\n",
    "\n",
    "Here takes the application 'Taxi' as an example.\n",
    "\n",
    "A few libraries are required for this notebook:\n",
    "  1. cudf-cu11\n",
    "  2. xgboost\n",
    "  3. scikit-learn\n",
    "  4. numpy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Import the Required Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xgboost.spark import SparkXGBRegressor, SparkXGBRegressorModel\n",
    "from pyspark.ml.tuning import ParamGridBuilder, CrossValidator\n",
    "from pyspark.ml.evaluation import RegressionEvaluator\n",
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql.types import FloatType, IntegerType, StructField, StructType\n",
    "from time import time\n",
    "from pyspark.conf import SparkConf\n",
    "import os\n",
    "# os.environ['PYSPARK_PYTHON'] = \"./environment/bin/python\"\n",
    "# os.environ['PYSPARK_DRIVER_PYTHON'] = \"./environment/bin/python\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create a Spark Session"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-11-30 08:02:09,748 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
      "2022-11-30 08:02:10,103 WARN resource.ResourceUtils: The configuration of cores (exec = 2 task = 1, runnable tasks = 2) will result in wasted resources due to resource gpu limiting the number of runnable tasks per executor to: 1. Please adjust your configuration.\n",
      "2022-11-30 08:02:23,737 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator 25.02.1 using cudf 25.02.1.\n",
      "2022-11-30 08:02:23,752 WARN rapids.RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n",
      "2022-11-30 08:02:23,756 WARN rapids.RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n",
      "2022-11-30 08:02:23,757 WARN rapids.RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n",
      "2022-11-30 08:02:24,226 WARN yarn.Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.\n"
     ]
    }
   ],
   "source": [
    "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"/your-url\")\n",
    "\n",
    "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-jar-path\")\n",
    "\n",
    "# You need to update with your real hardware resource \n",
    "driverMem = os.getenv(\"DRIVER_MEM\", \"2g\")\n",
    "executorMem = os.getenv(\"EXECUTOR_MEM\", \"2g\")\n",
    "pinnedPoolSize = os.getenv(\"PINNED_POOL_SIZE\", \"2g\")\n",
    "concurrentGpuTasks = os.getenv(\"CONCURRENT_GPU_TASKS\", \"2\")\n",
    "executorCores = int(os.getenv(\"EXECUTOR_CORES\", \"2\"))\n",
    "# Common spark settings\n",
    "conf = SparkConf()\n",
    "conf.setMaster(SPARK_MASTER_URL)\n",
    "conf.setAppName(\"Microbenchmark on GPU\")\n",
    "conf.set(\"spark.executor.instances\",\"1\")\n",
    "conf.set(\"spark.driver.memory\", driverMem)\n",
    "## The tasks will run on GPU memory, so there is no need to set a high host memory\n",
    "conf.set(\"spark.executor.memory\", executorMem)\n",
    "## The tasks will run on GPU cores, so there is no need to use many cpu cores\n",
    "conf.set(\"spark.executor.cores\", executorCores)\n",
    "\n",
    "# Plugin settings\n",
    "conf.set(\"spark.executor.resource.gpu.amount\", \"1\")\n",
    "conf.set(\"spark.rapids.sql.concurrentGpuTasks\", concurrentGpuTasks)\n",
    "conf.set(\"spark.rapids.memory.pinnedPool.size\", pinnedPoolSize)\n",
    "conf.set(\"spark.rapids.memory.gpu.allocFraction\",\"0.7\")\n",
    "conf.set(\"spark.locality.wait\",\"0\")\n",
    "##############note: only support value=1 https://github.com/dmlc/xgboost/blame/master/python-package/xgboost/spark/core.py#L370-L374\n",
    "conf.set(\"spark.task.resource.gpu.amount\", 1) \n",
    "conf.set(\"spark.rapids.sql.enabled\", \"true\") \n",
    "conf.set(\"spark.plugins\", \"com.nvidia.spark.SQLPlugin\")\n",
    "conf.set(\"spark.sql.cache.serializer\",\"com.nvidia.spark.ParquetCachedBatchSerializer\")\n",
    "conf.set(\"spark.sql.execution.arrow.maxRecordsPerBatch\", 200000) \n",
    "conf.set(\"spark.driver.extraClassPath\", RAPIDS_JAR)\n",
    "conf.set(\"spark.executor.extraClassPath\", RAPIDS_JAR)\n",
    "# if you pass/unpack the archive file and enable the environment\n",
    "# conf.set(\"spark.yarn.dist.archives\", \"your_pyspark_venv.tar.gz#environment\")\n",
    "# Create spark session\n",
    "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n",
    "\n",
    "reader = spark.read"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Specify the Data Schema and Load the Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "label = 'fare_amount'\n",
    "schema = StructType([\n",
    "    StructField('vendor_id', FloatType()),\n",
    "    StructField('passenger_count', FloatType()),\n",
    "    StructField('trip_distance', FloatType()),\n",
    "    StructField('pickup_longitude', FloatType()),\n",
    "    StructField('pickup_latitude', FloatType()),\n",
    "    StructField('rate_code', FloatType()),\n",
    "    StructField('store_and_fwd', FloatType()),\n",
    "    StructField('dropoff_longitude', FloatType()),\n",
    "    StructField('dropoff_latitude', FloatType()),\n",
    "    StructField(label, FloatType()),\n",
    "    StructField('hour', FloatType()),\n",
    "    StructField('year', IntegerType()),\n",
    "    StructField('month', IntegerType()),\n",
    "    StructField('day', FloatType()),\n",
    "    StructField('day_of_week', FloatType()),\n",
    "    StructField('is_weekend', FloatType()),\n",
    "])\n",
    "\n",
    "features = [ x.name for x in schema if x.name != label ]\n",
    "\n",
    "# You need to update them to your real paths!\n",
    "dataRoot = os.getenv(\"DATA_ROOT\", \"/data\")\n",
    "train_path = dataRoot + \"/taxi/csv/train\"\n",
    "eval_path = dataRoot + \"/taxi/csv/test\"\n",
    "\n",
    "data_format = 'csv'\n",
    "has_header = 'true'\n",
    "if data_format == 'csv':\n",
    "    train_data = reader.schema(schema).option('header',has_header).csv(train_path)\n",
    "    trans_data = reader.schema(schema).option('header',has_header).csv(eval_path)\n",
    "else :\n",
    "    train_data = reader.load(train_path)\n",
    "    trans_data = reader.load(eval_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Build a XGBoost-Spark CrossValidator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# First build a regressor of GPU version using *setFeaturesCols* to set feature columns\n",
    "params = { \n",
    "    \"tree_method\": \"gpu_hist\",\n",
    "    \"grow_policy\": \"depthwise\",\n",
    "    \"num_workers\": 1,\n",
    "    \"device\": \"cuda\",\n",
    "}\n",
    "params['features_col'] = features\n",
    "params['label_col'] = label\n",
    "\n",
    "regressor = SparkXGBRegressor(**params)\n",
    "# Then build the evaluator and the hyperparameters\n",
    "evaluator = (RegressionEvaluator()\n",
    "    .setLabelCol(label))\n",
    "param_grid = (ParamGridBuilder()\n",
    "    .addGrid(regressor.max_depth, [3, 6])\n",
    "    .addGrid(regressor.n_estimators, [100, 200])\n",
    "    .build())\n",
    "# Finally the corss validator\n",
    "cross_validator = (CrossValidator()\n",
    "    .setEstimator(regressor)\n",
    "    .setEvaluator(evaluator)\n",
    "    .setEstimatorParamMaps(param_grid)\n",
    "    .setNumFolds(2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Start Cross Validation by Fitting Data to CrossValidator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "If features_cols param set, then features_col param is ignored.\n",
      "/data/home/yuanli/work/reviews/pr252/pyspark_venv_20221125/lib/python3.8/site-packages/xgboost/sklearn.py:808: UserWarning: Loading a native XGBoost model with Scikit-Learn interface.\n",
      "  warnings.warn(\"Loading a native XGBoost model with Scikit-Learn interface.\")\n",
      "2022-11-30 08:03:14,308 WARN rapids.GpuOverrides: \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#889, fare_amount#890, 1.0#891, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#889 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#890 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#891 could run on GPU\n",
      "  !Expression <AttributeReference> obj#895 cannot run on GPU because expression AttributeReference obj#895 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n",
      "2022-11-30 08:03:14,317 WARN util.package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.\n",
      "If features_cols param set, then features_col param is ignored.\n",
      "2022-11-30 08:03:20,073 WARN rapids.GpuOverrides:                               \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#1789, fare_amount#1790, 1.0#1791, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#1789 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#1790 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#1791 could run on GPU\n",
      "  !Expression <AttributeReference> obj#1795 cannot run on GPU because expression AttributeReference obj#1795 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n",
      "If features_cols param set, then features_col param is ignored.\n",
      "2022-11-30 08:03:23,687 WARN rapids.GpuOverrides:                               \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#2689, fare_amount#2690, 1.0#2691, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#2689 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#2690 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#2691 could run on GPU\n",
      "  !Expression <AttributeReference> obj#2695 cannot run on GPU because expression AttributeReference obj#2695 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n",
      "If features_cols param set, then features_col param is ignored.\n",
      "2022-11-30 08:03:27,457 WARN rapids.GpuOverrides:                               \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#3589, fare_amount#3590, 1.0#3591, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#3589 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#3590 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#3591 could run on GPU\n",
      "  !Expression <AttributeReference> obj#3595 cannot run on GPU because expression AttributeReference obj#3595 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n",
      "If features_cols param set, then features_col param is ignored.\n",
      "2022-11-30 08:03:30,964 WARN rapids.GpuOverrides:                               \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#4659, fare_amount#4660, 1.0#4661, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#4659 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#4660 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#4661 could run on GPU\n",
      "  !Expression <AttributeReference> obj#4665 cannot run on GPU because expression AttributeReference obj#4665 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n",
      "If features_cols param set, then features_col param is ignored.\n",
      "2022-11-30 08:03:34,524 WARN rapids.GpuOverrides:                               \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#5559, fare_amount#5560, 1.0#5561, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#5559 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#5560 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#5561 could run on GPU\n",
      "  !Expression <AttributeReference> obj#5565 cannot run on GPU because expression AttributeReference obj#5565 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n",
      "If features_cols param set, then features_col param is ignored.\n",
      "2022-11-30 08:03:38,067 WARN rapids.GpuOverrides:                               \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#6459, fare_amount#6460, 1.0#6461, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#6459 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#6460 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#6461 could run on GPU\n",
      "  !Expression <AttributeReference> obj#6465 cannot run on GPU because expression AttributeReference obj#6465 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n",
      "If features_cols param set, then features_col param is ignored.\n",
      "2022-11-30 08:03:41,793 WARN rapids.GpuOverrides:                               \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#7359, fare_amount#7360, 1.0#7361, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#7359 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#7360 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#7361 could run on GPU\n",
      "  !Expression <AttributeReference> obj#7365 cannot run on GPU because expression AttributeReference obj#7365 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "If features_cols param set, then features_col param is ignored.\n",
      "[Stage 34:>                                                         (0 + 1) / 1]\r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cross-Validation takes 55.19 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\r",
      "                                                                                \r"
     ]
    }
   ],
   "source": [
    "def with_benchmark(phrase, action):\n",
    "    start = time()\n",
    "    result = action()\n",
    "    end = time()\n",
    "    print('{} takes {} seconds'.format(phrase, round(end - start, 2)))\n",
    "    return result\n",
    "model = with_benchmark('Cross-Validation', lambda: cross_validator.fit(train_data)).bestModel"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Transform On the Best Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Transforming takes 0.23 seconds\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-11-30 08:03:45,503 WARN rapids.GpuOverrides: \n",
      "!Exec <CollectLimitExec> cannot run on GPU because the Exec CollectLimitExec has been disabled, and is disabled by default because Collect Limit replacement can be slower on the GPU, if huge number of rows in a batch it could help by limiting the number of rows transferred from GPU to CPU. Set spark.rapids.sql.exec.CollectLimitExec to true if you wish to enable it\n",
      "  @Partitioning <SinglePartition$> could run on GPU\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----------+-----------+\n",
      "|fare_amount| prediction|\n",
      "+-----------+-----------+\n",
      "|        5.0| 5.01032114|\n",
      "|       34.0|  31.134758|\n",
      "|       10.0|9.288980484|\n",
      "|       16.5|15.33446312|\n",
      "|        7.0|8.197098732|\n",
      "+-----------+-----------+\n",
      "only showing top 5 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "def transform():\n",
    "    result = model.transform(trans_data).cache()\n",
    "    result.foreachPartition(lambda _: None)\n",
    "    return result\n",
    "result = with_benchmark('Transforming', transform)\n",
    "result.select(label, 'prediction').show(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluation takes 0.05 seconds\n",
      "RMSE is 2.055690464034438\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-11-30 08:03:45,728 WARN rapids.GpuOverrides: \n",
      "! <DeserializeToObjectExec> cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n",
      "  ! <CreateExternalRow> createexternalrow(prediction#7645, fare_amount#8271, 1.0#8272, StructField(prediction,DoubleType,true), StructField(fare_amount,DoubleType,true), StructField(1.0,DoubleType,false)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CreateExternalRow\n",
      "    @Expression <AttributeReference> prediction#7645 could run on GPU\n",
      "    @Expression <AttributeReference> fare_amount#8271 could run on GPU\n",
      "    @Expression <AttributeReference> 1.0#8272 could run on GPU\n",
      "  !Expression <AttributeReference> obj#8276 cannot run on GPU because expression AttributeReference obj#8276 produces an unsupported type ObjectType(interface org.apache.spark.sql.Row)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "accuracy = with_benchmark(\n",
    "    'Evaluation',\n",
    "    lambda: RegressionEvaluator().setLabelCol(label).evaluate(result))\n",
    "print('RMSE is ' + str(accuracy))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "spark.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
